My son’s first line of code:

# Benny's first line of code <3: ccx,,c 

—-Section: Formulating the question—-

#One may wonder what lead me to conduct this analysis on gender. First and foremost, being a researcher of the original study, I started the analysis of the data with some pre-formed conceptions. Mainly, I knew the original study was mainly concerned with participants ability to detect bias from higher-level groups, like organizations. So, I tried to think of an analysis that would be different from the primary goals of the initial study, but also something that could provide useful information for researchers to consider in future studies. Furthermore, I did not want to work the problem of biased information the same way as the initial study: higher-level groups to smaller individual differences. Instead, I decided to explore a smaller subset of the data that focused on what I call lower-level groups like gender. 
#I felt that gender was a fair compromise since it allowed for a small enough group of participants to consider and still provided individual participant information. I was also considering examining age, ethnicity, and gender together, but again, I felt this would be too similar of an approach of the initial study that worked the problem of bias from a larger group to a smaller group. 

—-Section: Installing and loading packages—-

# install.packages('tidyverse')
# install.packages('knitr')
# install.packages('magrittr')
# install.packages('tidylog')
# install.packages('broom')
# install.packages('skimr')
# install.packages('ggdist')
# 
# 
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(knitr)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(tidylog)
## 
## Attaching package: 'tidylog'
## The following objects are masked from 'package:dplyr':
## 
##     add_count, add_tally, anti_join, count, distinct, distinct_all,
##     distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
##     full_join, group_by, group_by_all, group_by_at, group_by_if,
##     inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
##     relocate, rename, rename_all, rename_at, rename_if, rename_with,
##     right_join, sample_frac, sample_n, select, select_all, select_at,
##     select_if, semi_join, slice, slice_head, slice_max, slice_min,
##     slice_sample, slice_tail, summarise, summarise_all, summarise_at,
##     summarise_if, summarize, summarize_all, summarize_at, summarize_if,
##     tally, top_frac, top_n, transmute, transmute_all, transmute_at,
##     transmute_if, ungroup
## The following objects are masked from 'package:tidyr':
## 
##     drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
##     spread, uncount
## The following object is masked from 'package:stats':
## 
##     filter
library(broom)
library(skimr)
library(ggdist)
library(tibble)
library(plotly)
## 
## Attaching package: 'plotly'
## The following objects are masked from 'package:tidylog':
## 
##     distinct, filter, group_by, mutate, rename, select, slice,
##     summarise, transmute, ungroup
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

—-Section: Reading in the data—-

# Creating a variable to house the copy of the original csv
# Copy of data will be used to preserve original data and make comparisons later
df_copy <- read_csv('CopyOfdataset_212.csv')
## Rows: 110 Columns: 85
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (85): StartDate, EndDate, Status, IPAddress, Progress, Duration (in seco...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

—-Section: -Looking at the data—

view(df_copy) # Viewing whole dataset
str(df_copy) # Viewing class of vars in dataset
## spec_tbl_df [110 x 85] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ StartDate            : chr [1:110] "Start Date" "{\"ImportId\":\"startDate\",\"timeZone\":\"America/Denver\"}" "12/1/2021 12:59" "12/1/2021 13:15" ...
##  $ EndDate              : chr [1:110] "End Date" "{\"ImportId\":\"endDate\",\"timeZone\":\"America/Denver\"}" "12/1/2021 13:11" "12/1/2021 13:37" ...
##  $ Status               : chr [1:110] "Response Type" "{\"ImportId\":\"status\"}" "IP Address" "IP Address" ...
##  $ IPAddress            : chr [1:110] "IP Address" "{\"ImportId\":\"ipAddress\"}" "169.236.78.22" "98.252.110.67" ...
##  $ Progress             : chr [1:110] "Progress" "{\"ImportId\":\"progress\"}" "100" "100" ...
##  $ Duration (in seconds): chr [1:110] "Duration (in seconds)" "{\"ImportId\":\"duration\"}" "707" "1331" ...
##  $ Finished             : chr [1:110] "Finished" "{\"ImportId\":\"finished\"}" "TRUE" "TRUE" ...
##  $ RecordedDate         : chr [1:110] "Recorded Date" "{\"ImportId\":\"recordedDate\",\"timeZone\":\"America/Denver\"}" "12/1/2021 13:11" "12/1/2021 13:37" ...
##  $ ResponseId           : chr [1:110] "Response ID" "{\"ImportId\":\"_recordId\"}" "R_8w9jt15B44Naulz" "R_DpfJvAfB8mGBGr7" ...
##  $ RecipientLastName    : chr [1:110] "Recipient Last Name" "{\"ImportId\":\"recipientLastName\"}" NA NA ...
##  $ RecipientFirstName   : chr [1:110] "Recipient First Name" "{\"ImportId\":\"recipientFirstName\"}" NA NA ...
##  $ RecipientEmail       : chr [1:110] "Recipient Email" "{\"ImportId\":\"recipientEmail\"}" NA NA ...
##  $ ExternalReference    : chr [1:110] "External Data Reference" "{\"ImportId\":\"externalDataReference\"}" NA NA ...
##  $ LocationLatitude     : chr [1:110] "Location Latitude" "{\"ImportId\":\"locationLatitude\"}" "37.29719543" "37.32569885" ...
##  $ LocationLongitude    : chr [1:110] "Location Longitude" "{\"ImportId\":\"locationLongitude\"}" "-120.4638977" "-120.4999008" ...
##  $ DistributionChannel  : chr [1:110] "Distribution Channel" "{\"ImportId\":\"distributionChannel\"}" "anonymous" "anonymous" ...
##  $ UserLanguage         : chr [1:110] "User Language" "{\"ImportId\":\"userLanguage\"}" "EN" "EN" ...
##  $ sonaid               : chr [1:110] "Welcome. To get SONA credit for this survey, you will need to give your SONA ID. Please enter your SONA ID belo"| __truncated__ "{\"ImportId\":\"QID1_TEXT\"}" "79324" "78481" ...
##  $ Q99_First Click      : chr [1:110] "Timing - First Click" "{\"ImportId\":\"QID99_FIRST_CLICK\"}" NA NA ...
##  $ Q99_Last Click       : chr [1:110] "Timing - Last Click" "{\"ImportId\":\"QID99_LAST_CLICK\"}" NA NA ...
##  $ Q99_Page Submit      : chr [1:110] "Timing - Page Submit" "{\"ImportId\":\"QID99_PAGE_SUBMIT\"}" NA NA ...
##  $ Q99_Click Count      : chr [1:110] "Timing - Click Count" "{\"ImportId\":\"QID99_CLICK_COUNT\"}" NA NA ...
##  $ cons_imm_low         : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID84\"}" "Mostly biased" "Neither biased or unbiased" ...
##  $ cons_imm_med         : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID86\"}" "Somewhat biased" "Somewhat unbiased" ...
##  $ cons_imm_hi          : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID90\"}" "Mostly biased" "Somewhat unbiased" ...
##  $ cons_crim_low        : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID89\"}" "Completely unbiased" "Completely unbiased" ...
##  $ cons_crim_med        : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID88\"}" "Completely biased" "Somewhat unbiased" ...
##  $ cons_crim_hi         : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID87\"}" "Completely biased" "Completely biased" ...
##  $ lib_imm_low          : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID91\"}" "Completely unbiased" "Somewhat unbiased" ...
##  $ lib_imm_med          : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID97\"}" "Neither biased or unbiased" "Somewhat biased" ...
##  $ lib_imm_hi           : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID96\"}" "Mostly unbiased" "Mostly biased" ...
##  $ lib_crim_low         : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID95\"}" "Completely unbiased" "Completely biased" ...
##  $ lib_crim_med         : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID94\"}" "Completely unbiased" "Completely biased" ...
##  $ lib_crim_hi          : chr [1:110] "Please choose the level of bias in the following headline:" "{\"ImportId\":\"QID93\"}" "Mostly unbiased" "Completely biased" ...
##  $ online_communities   : chr [1:110] "Do you view online communities as a place to:" "{\"ImportId\":\"QID19\"}" "Gain information, learn about the news" "Communicate with those outside of your direct community" ...
##  $ fact_sm              : chr [1:110] "How likely are you to \"fact check\" or do further research on the things you read on social media?" "{\"ImportId\":\"QID22\"}" "Somewhat" "Very" ...
##  $ fact_sm_how          : chr [1:110] "If you do fact check things, what process do you go through to do this?" "{\"ImportId\":\"QID58_TEXT\"}" "I use factual sources to look at data like government data, health data, and unbiased news outlet" "check the stats, Bloomberg news channel" ...
##  $ fact_online          : chr [1:110] "How likely are you to \"fact check\" or do further research on the things you read online?" "{\"ImportId\":\"QID23\"}" "Somewhat" "Very" ...
##  $ fact_online_how      : chr [1:110] "If you do fact check things you read online, how do you do go through this process?" "{\"ImportId\":\"QID56_TEXT\"}" "I look up data from government, health, and unbiased news sources/" "check the stats, outside resources... Ask people I know personally on the ground" ...
##  $ likely_click_1       : chr [1:110] "How likely are you to \"click\" on and read posts shared by: - Friends" "{\"ImportId\":\"QID24_1\"}" "Very" "Somewhat" ...
##  $ likely_click_2       : chr [1:110] "How likely are you to \"click\" on and read posts shared by: - Family" "{\"ImportId\":\"QID24_2\"}" "A little" "Somewhat" ...
##  $ likely_click_3       : chr [1:110] "How likely are you to \"click\" on and read posts shared by: - Online community group members" "{\"ImportId\":\"QID24_3\"}" "Very" "Somewhat" ...
##  $ likely_click_4       : chr [1:110] "How likely are you to \"click\" on and read posts shared by: - Celebrities" "{\"ImportId\":\"QID24_4\"}" "Not at all" "Not at all" ...
##  $ likely_click_5       : chr [1:110] "How likely are you to \"click\" on and read posts shared by: - Politcal Leaders" "{\"ImportId\":\"QID24_5\"}" "Very" "Not at all" ...
##  $ likely_click_6       : chr [1:110] "How likely are you to \"click\" on and read posts shared by: - News Companies" "{\"ImportId\":\"QID24_6\"}" "Very" "A little" ...
##  $ news_trust           : chr [1:110] "How much trust do you have in the news media to report accurately?" "{\"ImportId\":\"QID25\"}" "A moderate amount" "None at all" ...
##  $ effective_comm       : chr [1:110] "Do you think that news media organizations are effective in communicating information to the public?" "{\"ImportId\":\"QID26\"}" "A little" "A little" ...
##  $ discuss              : chr [1:110] "Do you ever discuss current events or news with family or friends?" "{\"ImportId\":\"QID52\"}" "Yes" "Yes" ...
##  $ dis_freq_1           : chr [1:110] "How many days in the past week did you talk about current events or news with family or friends? - Days" "{\"ImportId\":\"QID53_1\"}" "3" "5" ...
##  $ past_12_1            : chr [1:110] "During the past 12 months, have you done any of the followings: - Watched cable news" "{\"ImportId\":\"QID54_1\"}" "Have" "Have not" ...
##  $ past_12_2            : chr [1:110] "During the past 12 months, have you done any of the followings: - Read a news paper" "{\"ImportId\":\"QID54_2\"}" "Have not" "Have not" ...
##  $ past_12_3            : chr [1:110] "During the past 12 months, have you done any of the followings: - Watched private TV News stations (Fox, CNN, BBC, etc.)" "{\"ImportId\":\"QID54_3\"}" "Have not" "Have not" ...
##  $ past_12_4            : chr [1:110] "During the past 12 months, have you done any of the followings: - Read from news company websites online" "{\"ImportId\":\"QID54_4\"}" "Have" "Have" ...
##  $ past_12_5            : chr [1:110] "During the past 12 months, have you done any of the followings: - Read news from social media posts from professional sources" "{\"ImportId\":\"QID54_5\"}" "Have" "Have not" ...
##  $ past_12_6            : chr [1:110] "During the past 12 months, have you done any of the followings: - Read news from social media posts from friends or family" "{\"ImportId\":\"QID54_6\"}" "Have" "Have" ...
##  $ social_amt _1        : chr [1:110] "On average, how do you use the following types of social media? - Facebook" "{\"ImportId\":\"QID3_1\"}" "under 1 hr/day" "under 1 hr/day" ...
##  $ social_amt _2        : chr [1:110] "On average, how do you use the following types of social media? - Instagram" "{\"ImportId\":\"QID3_2\"}" "2+ hrs/day" "under 1 hr/day" ...
##  $ social_amt _3        : chr [1:110] "On average, how do you use the following types of social media? - Twitter" "{\"ImportId\":\"QID3_3\"}" "1-2 hrs/day" "none" ...
##  $ social_amt _4        : chr [1:110] "On average, how do you use the following types of social media? - TikTok" "{\"ImportId\":\"QID3_4\"}" "none" "none" ...
##  $ social_amt _5        : chr [1:110] "On average, how do you use the following types of social media? - Parler" "{\"ImportId\":\"QID3_5\"}" "none" "none" ...
##  $ social_amt _6        : chr [1:110] "On average, how do you use the following types of social media? - Reddit" "{\"ImportId\":\"QID3_6\"}" "none" "none" ...
##  $ social_amt _7        : chr [1:110] "On average, how do you use the following types of social media? - Youtube" "{\"ImportId\":\"QID3_7\"}" "2+ hrs/day" "2+ hrs/day" ...
##  $ social_why _1        : chr [1:110] "Please select your primary reason for using each social media platform: - Facebook" "{\"ImportId\":\"QID4_1\"}" "don't use" "unwind/relax" ...
##  $ social_why _2        : chr [1:110] "Please select your primary reason for using each social media platform: - Instagram" "{\"ImportId\":\"QID4_2\"}" "entertainment" "unwind/relax" ...
##  $ social_why _3        : chr [1:110] "Please select your primary reason for using each social media platform: - Twitter" "{\"ImportId\":\"QID4_3\"}" "news/gain information" "don't use" ...
##  $ social_why _4        : chr [1:110] "Please select your primary reason for using each social media platform: - TikTok" "{\"ImportId\":\"QID4_4\"}" "don't use" "don't use" ...
##  $ social_why _5        : chr [1:110] "Please select your primary reason for using each social media platform: - Parler" "{\"ImportId\":\"QID4_5\"}" "don't use" "don't use" ...
##  $ social_why _6        : chr [1:110] "Please select your primary reason for using each social media platform: - Reddit" "{\"ImportId\":\"QID4_6\"}" "don't use" "don't use" ...
##  $ social_why _7        : chr [1:110] "Please select your primary reason for using each social media platform: - Youtube" "{\"ImportId\":\"QID4_7\"}" "news/gain information" "entertainment" ...
##  $ ideology             : chr [1:110] "In political matters, people talk of \"the left\" and \"the right.\" How would you place your views on this 10-"| __truncated__ "{\"ImportId\":\"QID6\"}" "2" "6" ...
##  $ party                : chr [1:110] "Generally speaking, do you usually think of yourself as a Republican, a Democrat, an Independent, or what?" "{\"ImportId\":\"QID7\"}" "Independent" "Independent" ...
##  $ newsinterest_1       : chr [1:110] "Please indicate how the following statements apply to you (on a scale from \"not at all\" to \"a lot\" - I'm in"| __truncated__ "{\"ImportId\":\"QID8_1\"}" "9" "3" ...
##  $ newsinterest_2       : chr [1:110] "Please indicate how the following statements apply to you (on a scale from \"not at all\" to \"a lot\" - I disc"| __truncated__ "{\"ImportId\":\"QID8_2\"}" "8" "5" ...
##  $ newsinterest_3       : chr [1:110] "Please indicate how the following statements apply to you (on a scale from \"not at all\" to \"a lot\" - I part"| __truncated__ "{\"ImportId\":\"QID8_3\"}" "10" "5" ...
##  $ newsinterest_4       : chr [1:110] "Please indicate how the following statements apply to you (on a scale from \"not at all\" to \"a lot\" - How mu"| __truncated__ "{\"ImportId\":\"QID8_4\"}" "7" "10" ...
##  $ age                  : chr [1:110] "What is your age in years?" "{\"ImportId\":\"QID60_TEXT\"}" "20" "33" ...
##  $ gender               : chr [1:110] "What is your gender?" "{\"ImportId\":\"QID61\"}" "Female" "Female" ...
##  $ ethnicity            : chr [1:110] "How do you identify? [mark all that apply] - Selected Choice" "{\"ImportId\":\"QID62\"}" "Hispanic/Latino" "Asian-American/Pacific Islander,Other" ...
##  $ ethnicity_6_TEXT     : chr [1:110] "How do you identify? [mark all that apply] - Other - Text" "{\"ImportId\":\"QID62_6_TEXT\"}" "-99" "European (Spain)" ...
##  $ ethn_most            : chr [1:110] "Which do you most identify as? [select one] - Selected Choice" "{\"ImportId\":\"QID63\"}" "Hispanic/Latino" "Asian-American/Pacific Islander" ...
##  $ ethn_most_6_TEXT     : chr [1:110] "Which do you most identify as? [select one] - Other - Text" "{\"ImportId\":\"QID63_6_TEXT\"}" "-99" "-99" ...
##  $ comments             : chr [1:110] "__________________________\nYou have reached the end of our survey.  Thank you for your participation.Do you ha"| __truncated__ "{\"ImportId\":\"QID82_TEXT\"}" "Hello!" "-99" ...
##  $ id                   : chr [1:110] "id" "{\"ImportId\":\"id\"}" "79324" "78481" ...
##  $ FL_7_DO              : chr [1:110] "FL_7 - Block Randomizer - Display Order" "{\"ImportId\":\"FL_7_DO\"}" "BL_1MkFqd4DNpt0Iku" "BL_1MkFqd4DNpt0Iku" ...
##  $ headlines_DO         : chr [1:110] "headlines - Display Order" "{\"ImportId\":\"BL_9tRd5emXtaLggwC_DO\"}" "cons_imm_low|cons_imm_med|cons_imm_hi|cons_crim_low|cons_crim_med|cons_crim_hi|lib_imm_low|lib_imm_med |lib_imm"| __truncated__ "cons_imm_low|cons_imm_med|cons_imm_hi|cons_crim_low|cons_crim_med|cons_crim_hi|lib_imm_low|lib_imm_med |lib_imm"| __truncated__ ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   StartDate = col_character(),
##   ..   EndDate = col_character(),
##   ..   Status = col_character(),
##   ..   IPAddress = col_character(),
##   ..   Progress = col_character(),
##   ..   `Duration (in seconds)` = col_character(),
##   ..   Finished = col_character(),
##   ..   RecordedDate = col_character(),
##   ..   ResponseId = col_character(),
##   ..   RecipientLastName = col_character(),
##   ..   RecipientFirstName = col_character(),
##   ..   RecipientEmail = col_character(),
##   ..   ExternalReference = col_character(),
##   ..   LocationLatitude = col_character(),
##   ..   LocationLongitude = col_character(),
##   ..   DistributionChannel = col_character(),
##   ..   UserLanguage = col_character(),
##   ..   sonaid = col_character(),
##   ..   `Q99_First Click` = col_character(),
##   ..   `Q99_Last Click` = col_character(),
##   ..   `Q99_Page Submit` = col_character(),
##   ..   `Q99_Click Count` = col_character(),
##   ..   cons_imm_low = col_character(),
##   ..   cons_imm_med = col_character(),
##   ..   cons_imm_hi = col_character(),
##   ..   cons_crim_low = col_character(),
##   ..   cons_crim_med = col_character(),
##   ..   cons_crim_hi = col_character(),
##   ..   lib_imm_low = col_character(),
##   ..   lib_imm_med = col_character(),
##   ..   lib_imm_hi = col_character(),
##   ..   lib_crim_low = col_character(),
##   ..   lib_crim_med = col_character(),
##   ..   lib_crim_hi = col_character(),
##   ..   online_communities = col_character(),
##   ..   fact_sm = col_character(),
##   ..   fact_sm_how = col_character(),
##   ..   fact_online = col_character(),
##   ..   fact_online_how = col_character(),
##   ..   likely_click_1 = col_character(),
##   ..   likely_click_2 = col_character(),
##   ..   likely_click_3 = col_character(),
##   ..   likely_click_4 = col_character(),
##   ..   likely_click_5 = col_character(),
##   ..   likely_click_6 = col_character(),
##   ..   news_trust = col_character(),
##   ..   effective_comm = col_character(),
##   ..   discuss = col_character(),
##   ..   dis_freq_1 = col_character(),
##   ..   past_12_1 = col_character(),
##   ..   past_12_2 = col_character(),
##   ..   past_12_3 = col_character(),
##   ..   past_12_4 = col_character(),
##   ..   past_12_5 = col_character(),
##   ..   past_12_6 = col_character(),
##   ..   `social_amt _1` = col_character(),
##   ..   `social_amt _2` = col_character(),
##   ..   `social_amt _3` = col_character(),
##   ..   `social_amt _4` = col_character(),
##   ..   `social_amt _5` = col_character(),
##   ..   `social_amt _6` = col_character(),
##   ..   `social_amt _7` = col_character(),
##   ..   `social_why _1` = col_character(),
##   ..   `social_why _2` = col_character(),
##   ..   `social_why _3` = col_character(),
##   ..   `social_why _4` = col_character(),
##   ..   `social_why _5` = col_character(),
##   ..   `social_why _6` = col_character(),
##   ..   `social_why _7` = col_character(),
##   ..   ideology = col_character(),
##   ..   party = col_character(),
##   ..   newsinterest_1 = col_character(),
##   ..   newsinterest_2 = col_character(),
##   ..   newsinterest_3 = col_character(),
##   ..   newsinterest_4 = col_character(),
##   ..   age = col_character(),
##   ..   gender = col_character(),
##   ..   ethnicity = col_character(),
##   ..   ethnicity_6_TEXT = col_character(),
##   ..   ethn_most = col_character(),
##   ..   ethn_most_6_TEXT = col_character(),
##   ..   comments = col_character(),
##   ..   id = col_character(),
##   ..   FL_7_DO = col_character(),
##   ..   headlines_DO = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
skim(df_copy) # Performs most of the above
Data summary
Name df_copy
Number of rows 110
Number of columns 85
_______________________
Column type frequency:
character 85
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
StartDate 0 1.00 10 52 0 106 0
EndDate 0 1.00 8 50 0 107 0
Status 0 1.00 10 21 0 3 0
IPAddress 0 1.00 10 24 0 53 0
Progress 0 1.00 1 23 0 6 0
Duration (in seconds) 0 1.00 2 23 0 99 0
Finished 0 1.00 4 23 0 4 0
RecordedDate 0 1.00 13 55 0 107 0
ResponseId 0 1.00 11 24 0 110 0
RecipientLastName 108 0.02 19 32 0 2 0
RecipientFirstName 108 0.02 20 33 0 2 0
RecipientEmail 108 0.02 15 29 0 2 0
ExternalReference 108 0.02 23 36 0 2 0
LocationLatitude 7 0.94 9 31 0 16 0
LocationLongitude 7 0.94 11 32 0 16 0
DistributionChannel 0 1.00 9 34 0 3 0
UserLanguage 0 1.00 2 27 0 3 0
sonaid 1 0.99 5 226 0 109 0
Q99_First Click 58 0.47 1 32 0 33 0
Q99_Last Click 58 0.47 1 31 0 33 0
Q99_Page Submit 58 0.47 5 32 0 52 0
Q99_Click Count 58 0.47 1 32 0 14 0
cons_imm_low 6 0.95 13 58 0 9 0
cons_imm_med 6 0.95 13 58 0 9 0
cons_imm_hi 6 0.95 13 58 0 8 0
cons_crim_low 6 0.95 13 58 0 9 0
cons_crim_med 6 0.95 13 58 0 9 0
cons_crim_hi 6 0.95 13 58 0 9 0
lib_imm_low 6 0.95 13 58 0 8 0
lib_imm_med 6 0.95 13 58 0 9 0
lib_imm_hi 7 0.94 13 58 0 9 0
lib_crim_low 7 0.94 13 58 0 9 0
lib_crim_med 7 0.94 13 58 0 9 0
lib_crim_hi 7 0.94 13 58 0 9 0
online_communities 7 0.94 20 57 0 7 0
fact_sm 7 0.94 4 97 0 7 0
fact_sm_how 13 0.88 6 304 0 97 0
fact_online 7 0.94 4 88 0 7 0
fact_online_how 18 0.84 6 192 0 88 0
likely_click_1 7 0.94 4 68 0 7 0
likely_click_2 7 0.94 4 67 0 7 0
likely_click_3 7 0.94 4 91 0 7 0
likely_click_4 7 0.94 4 72 0 7 0
likely_click_5 7 0.94 4 77 0 7 0
likely_click_6 7 0.94 4 75 0 7 0
news_trust 7 0.94 5 66 0 6 0
effective_comm 7 0.94 5 100 0 7 0
discuss 7 0.94 2 66 0 5 0
dis_freq_1 7 0.94 1 103 0 11 0
past_12_1 7 0.94 4 84 0 4 0
past_12_2 7 0.94 4 83 0 4 0
past_12_3 7 0.94 4 120 0 4 0
past_12_4 7 0.94 3 104 0 5 0
past_12_5 7 0.94 4 125 0 4 0
past_12_6 7 0.94 4 122 0 4 0
social_amt _1 7 0.94 3 74 0 7 0
social_amt _2 7 0.94 3 75 0 7 0
social_amt _3 7 0.94 4 73 0 6 0
social_amt _4 7 0.94 4 72 0 6 0
social_amt _5 7 0.94 4 72 0 5 0
social_amt _6 7 0.94 4 72 0 6 0
social_amt _7 7 0.94 3 73 0 7 0
social_why _1 7 0.94 9 82 0 7 0
social_why _2 7 0.94 9 83 0 7 0
social_why _3 7 0.94 3 81 0 8 0
social_why _4 7 0.94 9 80 0 7 0
social_why _5 7 0.94 3 80 0 7 0
social_why _6 7 0.94 9 80 0 7 0
social_why _7 7 0.94 9 81 0 7 0
ideology 7 0.94 1 139 0 12 0
party 7 0.94 3 106 0 6 0
newsinterest_1 7 0.94 1 127 0 14 0
newsinterest_2 7 0.94 1 140 0 14 0
newsinterest_3 7 0.94 1 135 0 14 0
newsinterest_4 7 0.94 1 155 0 14 0
age 7 0.94 2 26 0 14 0
gender 7 0.94 4 20 0 5 0
ethnicity 7 0.94 5 60 0 14 0
ethnicity_6_TEXT 7 0.94 3 57 0 8 0
ethn_most 7 0.94 5 61 0 8 0
ethn_most_6_TEXT 7 0.94 3 58 0 7 0
comments 8 0.93 2 254 0 24 0
id 0 1.00 2 17 0 110 0
FL_7_DO 1 0.99 9 39 0 4 0
headlines_DO 6 0.95 25 152 0 3 0

—-Section: Data cleaning—-

# ** Please see the associated section of this code within the "Final_Project_Report.docx" for a further explanation of the methods used here.
# Removed import id row (row 1)
# Removed row that had associated survey question (row 2)
# Removed rows for participants who did not complete all of the survey (rows 104-110). 
df_copy <- df_copy[-c(1, 2, 104, 105, 106, 107, 108, 109, 110),]

—-Section: Data analyzations (part 1)—

# Reading in original variable to new variable to be used for specific manipulations in this section.
df_copy1 = df_copy 
# social_amt _1 - 7
# Recoding hours spent on social media 
# I only wanted a basic sum of the amount of usage for each participant. Thus, I chose not to make direct representation for hours. I just made the numbers to loosely represent a difference in amount for comparison. I realize this may throw off the analysis but I figured that since I made the numbers so small, it wouldn't make too much of a difference.
# 2+ hrs/day : 3 for example, next number is 3 
# 1-2 hrs/day: 2 highest in range is 2
# under 1 hr/day: 1 rounded to 1
# none: 0 none indicates no hours spent 
# How to see the diff types of rows in a column
hello = unique(df_copy1$`social_amt _1`)
# Recoding columns for hours on social media
df_copy1 <- df_copy1 %>%
  mutate_at(.vars = vars('social_amt _1', 'social_amt _2', 'social_amt _3', 'social_amt _4', 'social_amt _5', 'social_amt _6', 'social_amt _7'), .funs = funs(recode(., '2+ hrs/day' = 3, '1-2 hrs/day' = 2, 'under 1 hr/day' = 1, 'none' = 0, .default = NA_real_))) 
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## mutate_at: converted 'social_amt _1' from character to double (1 new NA)
##            converted 'social_amt _2' from character to double (1 new NA)
##            converted 'social_amt _3' from character to double (0 new NA)
##            converted 'social_amt _4' from character to double (0 new NA)
##            converted 'social_amt _5' from character to double (0 new NA)
##            converted 'social_amt _6' from character to double (0 new NA)
##            converted 'social_amt _7' from character to double (1 new NA)
# Take the sum of hours spend on all social media for each participant.
socialMediaUsage_sum <- rowSums(df_copy1[, c(56, 57, 58, 59, 60, 61, 62)], na.rm = TRUE)
# Splitting data by gender
splitting_data1 <- strsplit(df_copy1$gender, split = ", ")
# Created a new data frame and column that used the socialMediaUsage_sum output.
new_df1 <- data.frame(socialMediaUsage_sum = rep(socialMediaUsage_sum, sapply(splitting_data1, length)), gender = unlist(splitting_data1))
# dropped any NA values
# grouped to utilize dplyr functions
amount_by_gender1 <- na.omit(new_df1) %>%
  group_by(gender, socialMediaUsage_sum) %>%
  summarise(count = n())
## `summarise()` has grouped output by 'gender'. You can override using the `.groups` argument.
# Renamed columns to be more intuitive to observation I am trying to make
names(amount_by_gender1)[2] <- "SocialMediaUse_sum_perParticipant" 
names(amount_by_gender1)[3] <- "Count"
# Plotting results
ggplot(amount_by_gender1, aes(Count, reorder(gender, SocialMediaUse_sum_perParticipant))) +
  geom_col(aes(fill = SocialMediaUse_sum_perParticipant))  +
  scale_fill_gradient2(low = "#d7191c",
                       high = "#2b83ba",
                       mid = "#ffffbf",
                       midpoint = median(amount_by_gender1$SocialMediaUse_sum_perParticipant))+
  theme(axis.title.y = element_blank(),
        panel.background = element_rect(fill="black"),
        panel.grid = element_line(colour = "#333333"))

—-Section: Data anaylzations (part 2)—-

# Going to try to change variables from character to numeric.
# My hope is that it will be easier to make comparisons this way.
# Changing to numeric values to compare gender against how likely they were to fact check information participants came across.
# Resource: https://www.r-bloggers.com/2021/07/point-biserial-correlation-in-r-quick-guide/
df_copy2 = df_copy
# Recoding likelihood to fact check. 
df_copy_recode1 <- df_copy2 %>%
  mutate_at(.vars = vars('fact_online'), .funs = funs(recode(., 'Extremely' = 4, 'Very' = 3, 'Somewhat' = 2, 'A little' = 1, 'Not at all' = 0, .default = NA_real_))) 
## mutate_at: converted 'fact_online' from character to double (0 new NA)
# Recoding male or female
df_copy_recode2 <- df_copy2 %>%
  mutate_at(.vars = vars('gender'), .funs = funs(recode(., 'Male' = 0, 'Female' = 1)))
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
## mutate_at: converted 'gender' from character to double (1 new NA)
# How likely one is to fact check by gender 
results_Gen_Fact = cor.test(df_copy_recode2$gender, df_copy_recode1$fact_online) 
# Plotting results
# decline to answer only had one so it doesn't populate as normal
# Little dot is an outlier possibly representing arbitrary value
boxplot(fact_online~gender, data = df_copy_recode1)

—-Section: Data anaylzations (part 3)—-

# Next, I wanted to see was how often each gender discusses current events or news with family or friends
# Each variable is followed with a 3 to denote it belonging to part 3
df_copy3 = df_copy 
# Split by gender
splitting_data3 <- strsplit(df_copy3$gender, split = ", ")
# Create new dataframe to create new column that replicates the discuss column and associates with new split
new_df3 <- data.frame(discuss = rep(df_copy3$discuss, sapply(splitting_data3, length)), gender = unlist(splitting_data3))
# Attempted to delete the arbitrary value of -99, but failed and not sure why.
# Dropped NA values
# Grouped cols to utilize summarize and count functions
# amount_by_gender3 <- df_copy3 %>%
#   subset(discuss!="99") %>%
#   na.omit(new_df3) %>%
#   group_by(gender, discuss) %>%
#   summarise(count = n())
amount_by_gender3 <- na.omit(new_df3) %>%
  group_by(gender, discuss) %>%
  summarise(count = n())
## `summarise()` has grouped output by 'gender'. You can override using the `.groups` argument.
# Renamed columns to be more intuitive to observation I am trying to make
names(amount_by_gender3)[2] <- "Yes_or_No"
names(amount_by_gender3)[3] <- "Count"
# Plotting
gender_discuss_plt3 <- ggplot(amount_by_gender3, aes(Yes_or_No, Count, colour=gender))+
  geom_point(size=5)+
  xlab('Discusses current events') +
  ylab('Amount') +
  ggtitle('Whether or not each gender discusses current events')
ggplotly(gender_discuss_plt3, dynamicTicks = T)